library(readxl)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#setwd("~/courses/R_Courses/DataCleaning")
#install.packages("htmlwidgets")
A vector can be a “collection” of values or a single value 1. Atomic vector - a collection of values 2. Factors - special vectors that represent categorical data
genotype <- c(rep("WT",5),rep("KO",5))
genotype <- factor(genotype,levels=c("WT","KO"))
genotype
## [1] WT WT WT WT WT KO KO KO KO KO
## Levels: WT KO
summary(genotype)
## WT KO
## 5 5
y <- matrix(1:20, nrow=5,ncol=4,byrow = FALSE)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
y <- matrix(1:20, nrow=5,ncol=4,byrow = TRUE)
y
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
## [4,] 13 14 15 16
## [5,] 17 18 19 20
t(y) #transpose
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 5 9 13 17
## [2,] 2 6 10 14 18
## [3,] 3 7 11 15 19
## [4,] 4 8 12 16 20
rowSums(y)
## [1] 10 26 42 58 74
colMeans(y)
## [1] 9 10 11 12
d <- c(1,2,3,4)
e <- c("red", "white", "red", NA)
f <- c(TRUE,TRUE,TRUE,FALSE)
x <- data.frame(d,e,f)
names(x) <- c("ID","Color","Passed")
x
library(tibble)
as_tibble(iris)
tibble(x = 1:5, y = 1, z = x ^ 2 + y)
a <- c(1:5)
y <- matrix(1:20, nrow=5,ncol=4,byrow = TRUE)
w <- list(name="Fred", mynumbers=a, mymatrix=y, age=5.3)
w
## $name
## [1] "Fred"
##
## $mynumbers
## [1] 1 2 3 4 5
##
## $mymatrix
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 5 6 7 8
## [3,] 9 10 11 12
## [4,] 13 14 15 16
## [5,] 17 18 19 20
##
## $age
## [1] 5.3
Dataframes is the R native data type for storing a table of mixed datatypes. Tribbles are enhanced dataframes used by the tidyverse packages.
Load the an excel file on death rates and life expectancy
excel_sheets('Death_rates_and_life_expectancy_at_birth.xlsx')
## [1] "NCHS_-_Death_rates_and_life_exp"
deathrates <- read_excel('Death_rates_and_life_expectancy_at_birth.xlsx',sheet = 'NCHS_-_Death_rates_and_life_exp')
Load the CSV file demographics
demographics <- read_csv('demographics.csv')
## Parsed with column specification:
## cols(
## PAT_ID = col_character(),
## GENDER = col_character(),
## BIRTH_DATE = col_character(),
## DEATH_DATE = col_character(),
## RACE = col_character(),
## ETHNICITY = col_character()
## )
Create a CSV file on the fly
read_csv("a,b,c
1,2,3
4,5,6")
Skipping Lines
read_csv("The first line of metadata
The second line of metadata
x,y,z
1,2,3", skip = 2)
read_csv("# A comment I want to skip
x,y,z
1,2,3", comment = "#")
No Header Line
read_csv("1,2,3\n4,5,6", col_names = FALSE)
Setting the Colnames
read_csv("1,2,3\n4,5,6", col_names = c("x", "y", "z"))
readr uses a heuristic to figure out the type of each column
guess_parser("2010-10-01")
## [1] "date"
guess_parser("15:01")
## [1] "time"
guess_parser(c("TRUE", "FALSE"))
## [1] "logical"
guess_parser(c("1", "5", "9"))
## [1] "double"
guess_parser(c("12,352,561"))
## [1] "number"
str(parse_guess("2010-10-10"))
## Date[1:1], format: "2010-10-10"
readr allows you to set the data types: - col_guess() - the default - col_character() - col_double(), col_euro_double() • col_datetime(format = "“) Also - col_date(format =”“), col_time(format =”") - col_factor(levels, ordered = FALSE) - col_integer() - col_logical() - col_number(), col_numeric() - col_skip()
x <- read_csv('demographics.csv',col_types=cols(PAT_ID=col_character(),GENDER=col_character(),BIRTH_DATE=col_date(),DEATH_DATE=col_date(), RACE=col_character(),ETHNICITY=col_character()))
## Warning: 23 parsing failures.
## row col expected actual file
## 1 BIRTH_DATE date like 27-JUN-57 'demographics.csv'
## 2 BIRTH_DATE date like 14-MAR-54 'demographics.csv'
## 2 DEATH_DATE date like 26-MAR-12 'demographics.csv'
## 3 BIRTH_DATE date like 13-JUN-40 'demographics.csv'
## 4 BIRTH_DATE date like 20-APR-81 'demographics.csv'
## ... .......... .......... ......... ..................
## See problems(...) for more details.
write_csv(demographics, "test1.csv")
write_rds(demographics, "demographics.rds")
A table is tidy if:
Tidy Data:
p1 <- ggplot(deathrates, aes(x=Race, y=AverageLifeExpectancy_Years,color=Race)) + geom_boxplot()
p2 <- ggplot(deathrates, aes(x=Sex, y=AverageLifeExpectancy_Years,color=Sex)) + geom_boxplot()
grid.arrange(p1,p2,nrow=1)
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
There are several ways to deal with missing data:
nrow(deathrates)
## [1] 1044
t1 <- drop_na(deathrates,AverageLifeExpectancy_Years)
nrow(t1)
## [1] 1035
t2 <- replace_na(deathrates,list(AverageLifeExpectancy_Years = 68))
head(deathrates)
head(t2)
t3 <- fill(deathrates,AverageLifeExpectancy_Years)
nrow(drop_na(deathrates))
## [1] 1035
nrow(drop_na(t3))
## [1] 1043
head(t3)
Regular expressions, or regexps, are a concise language for describing patterns in strings.
str <- c("Mouse:C57BL:WT:001","Mouse:C57BL:WT:002","Mouse:C57BL:KO:001","Mouse:C57BL:KO:002","Mouse:B6129SF2:WT:001","Mouse:B6129SF2:WT:002","Mouse:B6129SF2:KO:001","Mouse:B6129SF2:KO:002")
str_view_all(str,"M\\w")
str_view_all(str,"M\\w+")
str_view_all(str,"M|K")
str_view_all(str,"[MWK]")
str_view_all(str,"\\d+")
str_view_all(str,"M[:alpha:]")
str_view_all(str,"M[:alpha:]+")
str_view_all(str,"[:digit:]+")
str_view_all(str,"BL*")
str_view_all(str,"[:digit:]{2}")
str_view_all(str,"^Mouse:C")
str_view_all(str,"001$")
Coding in Regex
Here is an example of the sequences names from a fastq file:
NB551050:207:HVWYGBGX9:1:12103:8448:7970:CGTTTCCT NB551050:207:HVWYGBGX9:1:12103:8448:7970:CGTTTCCT NB551050:207:HVWYGBGX9:1:12107:14501:17909:GCTAATGG NB551050:207:HVWYGBGX9:1:12107:14501:17909:GCTAATGG NB551050:207:HVWYGBGX9:4:11401:4511:6993:ATGCTTTA NB551050:207:HVWYGBGX9:4:11401:4511:6993:ATGCTTTA NB551050:207:HVWYGBGX9:2:11304:2936:5123:CTTTGTAT NB551050:207:HVWYGBGX9:4:12609:24035:16440:CTTTGTAT NB551050:207:HVWYGBGX9:2:11304:2936:5123:CTTTGTAT NB551050:207:HVWYGBGX9:4:12609:24035:16440:CTTTGTAT
Detecting Matches
str_detect(demographics$GENDER,'F')
## [1] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE
## [13] FALSE TRUE TRUE FALSE FALSE TRUE FALSE TRUE
str_which(demographics$GENDER,'F')
## [1] 1 2 3 4 6 9 11 14 15 18 20
str_count(demographics$GENDER,'F')
## [1] 1 1 1 1 0 1 0 0 1 0 1 0 0 1 1 0 0 1 0 1
str_locate(demographics$GENDER,'F')
## start end
## [1,] 1 1
## [2,] 1 1
## [3,] 1 1
## [4,] 1 1
## [5,] NA NA
## [6,] 1 1
## [7,] NA NA
## [8,] NA NA
## [9,] 1 1
## [10,] NA NA
## [11,] 1 1
## [12,] NA NA
## [13,] NA NA
## [14,] 1 1
## [15,] 1 1
## [16,] NA NA
## [17,] NA NA
## [18,] 1 1
## [19,] NA NA
## [20,] 1 1
String Length
teststr <- c("banana","strawberry","pineapple", " apple ")
str_length(teststr)
## [1] 6 10 9 7
str_trunc(teststr,6)
## [1] "banana" "str..." "pin..." " ap..."
str_trim(teststr)
## [1] "banana" "strawberry" "pineapple" "apple"
str_pad(teststr,10)
## [1] " banana" "strawberry" " pineapple" " apple "
Order
str_order(teststr)
## [1] 4 1 3 2
str_sort(teststr)
## [1] " apple " "banana" "pineapple" "strawberry"
Capitilization
str_to_upper(teststr)
## [1] "BANANA" "STRAWBERRY" "PINEAPPLE" " APPLE "
str_to_lower(teststr)
## [1] "banana" "strawberry" "pineapple" " apple "
str_to_title(str_c(teststr, sep=" ",collapse = " "))
## [1] "Banana Strawberry Pineapple Apple "
Concat and Split
str_c(teststr, teststr, sep=" ")
## [1] "banana banana" "strawberry strawberry" "pineapple pineapple"
## [4] " apple apple "
str_c(teststr, collapse = " ")
## [1] "banana strawberry pineapple apple "
str_dup(teststr,3)
## [1] "bananabananabanana" "strawberrystrawberrystrawberry"
## [3] "pineapplepineapplepineapple" " apple apple apple "
firstname <- "Firstname"
lastname <- "Lastname"
course <- "Data Science for Biologist"
startdate <- as.Date("2020-01-10")
str_glue(
"Dear {firstname} {lastname}: ",
"Congratulations, you have been accepted into the {course} Nanocourse, ",
"starting on {format(startdate, '%A, %B %d, %Y')}."
)
## Dear Firstname Lastname: Congratulations, you have been accepted into the Data Science for Biologist Nanocourse, starting on Friday, January 10, 2020.
str_split(teststr,"",4)
## [[1]]
## [1] "b" "a" "n" "ana"
##
## [[2]]
## [1] "s" "t" "r" "awberry"
##
## [[3]]
## [1] "p" "i" "n" "eapple"
##
## [[4]]
## [1] " " "a" "p" "ple "
str_split(teststr,"a")
## [[1]]
## [1] "b" "n" "n" ""
##
## [[2]]
## [1] "str" "wberry"
##
## [[3]]
## [1] "pine" "pple"
##
## [[4]]
## [1] " " "pple "
str_split_fixed(teststr,"",4)
## [,1] [,2] [,3] [,4]
## [1,] "b" "a" "n" "ana"
## [2,] "s" "t" "r" "awberry"
## [3,] "p" "i" "n" "eapple"
## [4,] " " "a" "p" "ple "
Subset
str <- c("Mouse:C57BL:WT:001","Mouse:C57BL:WT:002","Mouse:C57BL:KO:001","Mouse:C57BL:KO:002","Mouse:B6129SF2:WT:001","Mouse:B6129SF2:WT:002","Mouse:B6129SF2:KO:001","Mouse:B6129SF2:KO:002")
str_sub(str,6)
## [1] ":C57BL:WT:001" ":C57BL:WT:002" ":C57BL:KO:001" ":C57BL:KO:002"
## [5] ":B6129SF2:WT:001" ":B6129SF2:WT:002" ":B6129SF2:KO:001" ":B6129SF2:KO:002"
str_sub(str,-6)
## [1] "WT:001" "WT:002" "KO:001" "KO:002" "WT:001" "WT:002" "KO:001" "KO:002"
str_sub(str,1,4)
## [1] "Mous" "Mous" "Mous" "Mous" "Mous" "Mous" "Mous" "Mous"
str_subset(str,"1")
## [1] "Mouse:C57BL:WT:001" "Mouse:C57BL:KO:001" "Mouse:B6129SF2:WT:001"
## [4] "Mouse:B6129SF2:WT:002" "Mouse:B6129SF2:KO:001" "Mouse:B6129SF2:KO:002"
str_match(str,"1")
## [,1]
## [1,] "1"
## [2,] NA
## [3,] "1"
## [4,] NA
## [5,] "1"
## [6,] "1"
## [7,] "1"
## [8,] "1"
str_match_all(str,"1")
## [[1]]
## [,1]
## [1,] "1"
##
## [[2]]
## [,1]
##
## [[3]]
## [,1]
## [1,] "1"
##
## [[4]]
## [,1]
##
## [[5]]
## [,1]
## [1,] "1"
## [2,] "1"
##
## [[6]]
## [,1]
## [1,] "1"
##
## [[7]]
## [,1]
## [1,] "1"
## [2,] "1"
##
## [[8]]
## [,1]
## [1,] "1"
Check the gender categories in the demographics table
unique(demographics$GENDER)
## [1] "FEMALE" "F" "M" "MALE" "FE"
Mutate Strings
Replace Data Entry Mistakes
Update the Gender Column to Make 2 GENDER Categories M and F
demographics$GENDER <- str_replace(demographics$GENDER,'F.+','F')
demographics$GENDER <- str_replace(demographics$GENDER,'MALE','M')
unique(demographics$GENDER)
## [1] "F" "M"
deathrates$Race <- str_replace(deathrates$Race,'All Races','All')
head(deathrates)
filter(deathrates,Year==1900)
dr1 <- spread(deathrates, Race, AverageLifeExpectancy_Years)
dr2 <- gather(dr1, `All`, `Black`, `White`, key = "Race", value = "AvgLifeExpectancy")
head(dr1)
head(dr2)
df1 <- data.frame(rows = c("one","two","three"),two = 1:3,three=1:3)
df1
df2 <- df1 %>% gather(newrows,valname,-rows) %>% spread(rows,valname)
df2
dm1 <- separate(demographics, BIRTH_DATE, into = c("BirthDayOfMonth", "BirthMonth","BirthYear"))
head(dm1)
dm2 <- unite(demographics,RACE,ETHNICITY,col="Population",sep=' ')
head(dm2)
separate_rows allows for the separation to generate multiple rows
deathrates[is.na(deathrates$AverageLifeExpectancy_Years),]
t1[is.na(t1$AverageLifeExpectancy_Years),]
t3 <- complete(t1,Year=1900:2015,Race=c('All','Black','White'),Sex=c('Both Sexes','Male','Female'))
t1[is.na(t1$AverageLifeExpectancy_Years),]
t3[is.na(t3$AverageLifeExpectancy_Years),]
t4 <- complete(t1,Year=1900:2015,Race=c('All','Black','White'),Sex=c('Both Sexes','Male','Female'),fill=list(AverageLifeExpectancy_Years=68))
t4[t4$Year>2014,]
df <- tibble(
year = c(2010, 2010, 2010, 2010, 2012, 2012, 2012),
qtr = c( 1, 2, 3, 4, 1, 2, 3),
return = rnorm(7)
)
df %>% expand(year = full_seq(2010:2012, 1), qtr)
# #
#
bmi <- rnorm(20,mean=30,sd=5)
pat_id <- dm2$PAT_ID
pat.bmi <- bind_cols(as.data.frame(pat_id),as.data.frame(bmi))
head(pat.bmi)
dm3 <- merge(dm2,pat.bmi,by.x='PAT_ID',by.y='pat_id')
head(dm3)
ggplot(dm3,aes(x=GENDER, y=bmi,color=GENDER)) + geom_boxplot()
#
print(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
as.tibble(mtcars)
## Warning: `as.tibble()` is deprecated, use `as_tibble()` (but mind the new semantics).
## This warning is displayed once per session.
data.frame
df <- data.frame(abc = 1, xyz = "a")
df$xyz
## [1] a
## Levels: a
df[, "xyz"]
## [1] a
## Levels: a
df[, c("abc", "xyz")]
df <- tibble(abc=1,xyz='a')
df$xyz
## [1] "a"
df[['xyz']]
## [1] "a"
df[, c("abc", "xyz")]
cars <- as.tibble(mtcars)
cars$mpg
## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4
## [16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7
## [31] 15.0 21.4
annoying <- tibble(`1` = 1:10,`2` = `1` * 2 + rnorm(length(1)))
annoying$`1`
## [1] 1 2 3 4 5 6 7 8 9 10
ggplot(annoying,aes(x=`1`,y=`2`)) + geom_point()
annoying$`3` <- annoying$`2`/annoying$`1`
colnames(annoying) <- c("one","two","three")
annoying
enframe(annoying$one)
What function would you use to read a file where fields were separated with “|”?
read_delim
What sort of data can be read using read_fwf()?
Read A Fixed Width File Into A Tibble
x,y 1,‘a,b’
read_csv('x,y\n1,"a,b"')
read_csv("a,b,\n1,2,3\n4,5,6")
## Warning: Missing column names filled in: 'X3' [3]
read_csv("a,b,c\n1,2,\n1,2,3,4")
## Warning: 1 parsing failure.
## row col expected actual file
## 2 -- 3 columns 4 columns literal data
read_csv("a,b\n\"1,")
## Warning: 2 parsing failures.
## row col expected actual file
## 1 a closing quote at end of file literal data
## 1 -- 2 columns 1 columns literal data
read_csv("a,b\n1,2\na,b")
read_delim("a;b\n1;3",delim=';')
read_csv("a,b,c\n1,2,3\n4,5,6")
read_csv("a,b,c,d\n1,2,NA,NA\n1,2,3,4")
read_csv("a,b\n1,NA")
read_csv("a,b\n1,2\na,b")
read_delim("a;b\n1;3",delim=';')
ex1 <- read_csv("a,b,c,d\n1,2,NA,NA\n1,2,3,4")
write_csv(ex1,'example_file.csv')
demographics <- read_csv('demographics.csv')
## Parsed with column specification:
## cols(
## PAT_ID = col_character(),
## GENDER = col_character(),
## BIRTH_DATE = col_character(),
## DEATH_DATE = col_character(),
## RACE = col_character(),
## ETHNICITY = col_character()
## )
demographics$RACE <- str_replace(demographics$RACE,'AA','Black')
demographics$RACE <- str_replace(demographics$RACE,'Caucasian','White')
demographics$RACE
## [1] "White" "Black" "Black" "White" "White" "Unknown"
## [7] "White" "AmIndAN" "Unknown" "Asian" "White" "White"
## [13] "White" "Other" "Unknown" "Declined" "White" "White"
## [19] "Asian" "HawPacIs"
dm1 <- separate(demographics, BIRTH_DATE, into = c("BirthDayOfMonth", "BirthMonth","BirthYear"))
dm2 <- separate(dm1, DEATH_DATE, into = c("DeathDayOfMonth", "DeathMonth","DeathYear"))
dm4 <- replace_na(dm2,list(DeathYear = 20))
dm4$BirthYear <- as.numeric(dm4$BirthYear) + 1900
dm4$DeathYear <- as.numeric(dm4$DeathYear) + 2000 #assume all deaths happen after 2000
dm4$age <- as.numeric(dm4$DeathYear) - dm4$BirthYear
dm4$bmi <- rnorm(20,mean=30,sd=5)
dm4
ggplot(dm4,aes(x=age,y=bmi)) + geom_point()
dm3 <- separate(dm1, DEATH_DATE, into = c("DeathDayOfMonth", "DeathMonth","DeathYear"))
excel_sheets('CausesOfDeathUSA.xlsx')
## [1] "Sheet 1 - NCHS_-_Leading_Causes"
causedeath <- read_excel('CausesOfDeathUSA.xlsx',sheet = 'Sheet 1 - NCHS_-_Leading_Causes',skip=1)
causedeath
cd <- separate(causedeath, `113 Cause Name`, into = c("SubCauseName","SubCauseCode"))
## Warning: Expected 2 pieces. Additional pieces discarded in 9360 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
cd